library(sf)
library(gstat)
library(ggplot2)
library(viridis)
library(cowplot)
library(dplyr)
library(ggrepel)

# ============================================================================
# SIMPLIFIED IDW INTERPOLATION VISUALIZATION
# ============================================================================

set.seed(456)

# ============================================================================
# STEP 1: SETUP - Create sample data with fewer points for clarity
# ============================================================================

comment_points <- data.frame(
  comment_id = 1:10,
  x = c(15000, 25000, 35000, 45000, 55000, 65000, 75000, 85000,
        20000, 40000),
  y = c(20000, 80000, 30000, 70000, 25000, 85000, 35000, 75000,
        50000, 55000),
  scalar_sum = c(0.25, 0.25, 0.25, 0.25, 0.25, 0.50, 0.25, 0.50, 0.25, 0.25)
) %>%
  st_as_sf(coords = c("x", "y"), crs = 32613)

# Create 10km grid
grid_boundary <- st_bbox(c(xmin = 0, xmax = 100000, 
                           ymin = 0, ymax = 100000), 
                         crs = 32613)

grid <- st_make_grid(
  st_as_sfc(grid_boundary),
  cellsize = 10000,
  what = "centers"
) %>%
  st_sf() %>%
  mutate(cell_id = row_number())

grid_polygons <- st_make_grid(
  st_as_sfc(grid_boundary),
  cellsize = 10000,
  what = "polygons"
) %>%
  st_sf() %>%
  mutate(cell_id = row_number())

# Extract coordinates for labeling
comment_coords <- st_coordinates(comment_points)
comment_points_df <- comment_points %>%
  st_drop_geometry() %>%
  bind_cols(as.data.frame(comment_coords))

# ============================================================================
# PLOT 1: SETUP - Sample Points
# ============================================================================

p1 <- ggplot() +
  geom_sf(data = grid_polygons, fill = NA, colour = "grey80", linewidth = 0.3) +
  geom_sf(data = comment_points, 
          aes(colour = scalar_sum, size = scalar_sum),
          alpha = 0.8) +
  scale_colour_viridis(option = "plasma", 
                       name = "Score",
                       limits = c(0, 10)) +
  scale_size_continuous(range = c(4, 10), guide = "none") +
  geom_text_repel(
    data = comment_points_df,
    aes(x = X, y = Y, label = sprintf("%.2f", scalar_sum)),
    colour = "black",
    bg.color = "white",
    bg.r = 0.15,
    fontface = "bold",
    size = 3,
    box.padding = 0.5,
    point.padding = 0.3,
    segment.color = "grey50",
    segment.size = 0.3,
    min.segment.length = 0.1,
    max.overlaps = Inf,
    seed = 123
  ) +
  labs(title = "Step 1: Setup",
       subtitle = "Comment locations with scores\nGrid: 10km × 10km cells") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(size = 10, colour = "grey40"),
    axis.text = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank()
  )

print(p1)

# ============================================================================
# STEP 2: DISTANCE CALCULATION - Show distances from one cell
# ============================================================================

# Select example cell (middle area)
example_cell_id <- 55
example_cell <- grid[example_cell_id, ]
example_cell_poly <- grid_polygons[example_cell_id, ]

# Calculate distances
distances <- st_distance(example_cell, comment_points)[1, ]
distances_km <- as.numeric(distances) / 1000

# Calculate weights
weights <- 1 / (distances^2)
weights_normalized <- weights / sum(weights)

weight_data <- data.frame(
  comment_id = comment_points$comment_id,
  scalar_sum = comment_points$scalar_sum,
  distance_km = distances_km,
  weight = as.numeric(weights_normalized),
  contribution = comment_points$scalar_sum * as.numeric(weights_normalized)
) %>%
  arrange(distance_km)

p2 <- ggplot() +
  geom_sf(data = grid_polygons, fill = "grey95", colour = "grey80", linewidth = 0.2) +
  geom_sf(data = example_cell_poly, fill = alpha("yellow", 0.5), 
          colour = "orange", linewidth = 1.5) +
  geom_segment(data = weight_data,
               aes(x = st_coordinates(example_cell)[1],
                   y = st_coordinates(example_cell)[2],
                   xend = st_coordinates(comment_points)[comment_id, 1],
                   yend = st_coordinates(comment_points)[comment_id, 2]),
               colour = "steelblue", linewidth = 0.6, linetype = "dashed") +
  geom_sf(data = comment_points, 
          aes(colour = scalar_sum, size = scalar_sum),
          alpha = 0.8) +
  scale_colour_viridis(option = "plasma", name = "Score") +
  scale_size_continuous(range = c(4, 10), guide = "none") +
  geom_sf_text(data = example_cell_poly,
               label = sprintf("Target\nCell %d", example_cell_id),
               colour = "black", fontface = "bold", size = 3.5) +
  geom_text_repel(
    data = weight_data %>% slice_tail(n = 5),
    aes(x = st_coordinates(comment_points)[comment_id, 1],
        y = st_coordinates(comment_points)[comment_id, 2],
        label = sprintf("d=%.1fkm", distance_km)),
    colour = "steelblue",
    fontface = "bold",
    size = 3,
    nudge_x = 5000,
    segment.color = "steelblue",
    bg.color = "white",
    bg.r = 0.1
  ) +
  labs(title = "Step 2: Distance Calculation",
       subtitle = "Calculate distance (d) from target cell to each comment point") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(size = 10, colour = "grey40"),
    axis.text = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank()
  )

print(p2)

# ============================================================================
# STEP 3: WEIGHT CALCULATION - Show weight decay
# ============================================================================

p3 <- ggplot(weight_data, aes(x = distance_km, y = weight * 100)) +
  geom_line(linewidth = 1.5, colour = "steelblue") +
  geom_point(aes(size = scalar_sum, colour = scalar_sum), alpha = 0.8) +
  scale_colour_viridis(option = "plasma", name = "Violence\nScore") +
  scale_size_continuous(range = c(3, 8), guide = "none") +
  geom_text_repel(
    data = weight_data %>% slice_head(n = 5),
    aes(label = sprintf("%.1f%%", weight * 100)),
    size = 3,
    nudge_y = 3,
    segment.color = "grey60"
  ) +
  labs(title = "Step 3: Weight Calculation",
       subtitle = "Weight = 1/d² (inverse square of distance)\nCloser points receive higher weights",
       x = "Distance from Target Cell (km)",
       y = "Weight (%)") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(size = 10, colour = "grey40"),
    panel.grid.minor = element_blank()
  )

print(p3)

# ============================================================================
# STEP 4: WEIGHTED AVERAGE - Show calculation
# ============================================================================

# Top 5 contributors
top_5 <- weight_data %>%
  arrange(desc(contribution)) %>%
  slice_head(n = 5)

interpolated_value <- sum(weight_data$contribution)

p4 <- ggplot(top_5, 
             aes(x = reorder(paste0("Point ", comment_id), contribution),
                 y = contribution)) +
  geom_col(fill = "steelblue", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = sprintf("%.2f × %.1f%% = %.3f", 
                                scalar_sum, weight * 100, contribution)),
            hjust = -0.05, size = 3.5, fontface = "bold") +
  coord_flip() +
  scale_y_continuous(expand = expansion(mult = c(0, 0.25))) +
  labs(title = "Step 4: Weighted Average Calculation",
       subtitle = sprintf("IDW = Σ(score × weight) = %.2f\nShowing top 5 contributors (of %d total)",
                          interpolated_value, n_points),
       x = NULL,
       y = "Contribution to IDW Score") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(size = 10, colour = "grey40"),
    panel.grid.major.y = element_blank()
  )

print(p4)

# ============================================================================
# STEP 5: INTERPOLATION - Apply to all grid cells
# ============================================================================

# Perform IDW for all cells
idw_result <- idw(
  formula = scalar_sum ~ 1,
  locations = comment_points,
  newdata = grid,
  idp = 2
)

grid_with_idw <- grid_polygons %>%
  mutate(idw_score = idw_result$var1.pred)

p5 <- ggplot() +
  geom_sf(data = grid_with_idw, 
          aes(fill = idw_score),
          colour = "white", linewidth = 0.4) +
  scale_fill_viridis(option = "plasma", 
                     name = "IDW\nScore",
                     limits = c(0, 0.5)) +
  geom_sf(data = comment_points, 
          colour = "white", size = 2.5, shape = 21,
          fill = "black", stroke = 1) +
  labs(title = "Step 3: Interpolation Result",
       subtitle = "IDW applied to all grid cells\nBlack points = original observations") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(size = 10, colour = "grey40"),
    axis.text = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank()
  )

print(p5)

# ============================================================================
# STEP 6: COMPARISON - Different power parameters
# ============================================================================

idp_values <- c(1, 2, 3)
idw_comparison <- list()

for(p in idp_values) {
  idw_temp <- idw(
    formula = scalar_sum ~ 1,
    locations = comment_points,
    newdata = grid,
    idp = p
  )
  
  idw_comparison[[paste0("idp_", p)]] <- grid_polygons %>%
    mutate(
      idw_score = idw_temp$var1.pred,
      idp_label = paste("idp =", p)
    )
}

idw_comparison_df <- bind_rows(idw_comparison)

p6 <- ggplot() +
  geom_sf(data = idw_comparison_df,
          aes(fill = idw_score),
          colour = "white", linewidth = 0.3) +
  geom_sf(data = comment_points, 
          colour = "white", size = 1.5, shape = 21,
          fill = "black", stroke = 0.8) +
  scale_fill_viridis(option = "plasma", name = "IDW\nScore",
                     limits = c(0, .5)) +
  facet_wrap(~idp_label, ncol = 3) +
  labs(title = "Effect of Power Parameter") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(size = 10, colour = "grey40"),
    axis.text = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    strip.text = element_text(face = "bold", size = 11)
  )

print(p6)

# ============================================================================
# COMBINE INTO FINAL FIGURE
# ============================================================================

# 3x2 layout
combined_plot <- plot_grid(
  p1, p2,
  # p3, p4,
  p5, p6,
  ncol = 2,
  nrow = 2,
  align = "hv",
  rel_widths = c(1, 1)
)

final_plot <- ggdraw() +
  draw_plot(combined_plot, y = 0.02, height = 0.96) +
  draw_label("Inverse Distance Weighting (IDW) Interpolation: Step-by-Step Process",
             x = 0.5, y = 0.99, hjust = 0.5, vjust = 1,
             size = 15, fontface = "bold")

print(final_plot)

ggsave("idw_simplified_process.png", final_plot,
       width = 10, height = 8, dpi = 300, bg = "white")

# Save individual plots
ggsave("idw_step1_setup.png", p1, width = 7, height = 7, dpi = 300, bg = "white")
ggsave("idw_step2_distance.png", p2, width = 7, height = 7, dpi = 300, bg = "white")
ggsave("idw_step3_weights.png", p3, width = 7, height = 5, dpi = 300, bg = "white")
ggsave("idw_step4_calculation.png", p4, width = 9, height = 5, dpi = 300, bg = "white")
ggsave("idw_step5_result.png", p5, width = 7, height = 7, dpi = 300, bg = "white")
ggsave("idw_step6_comparison.png", p6, width = 12, height = 5, dpi = 300, bg = "white")

# ============================================================================
# STEP DESCRIPTIONS FOR DOCUMENTATION
# ============================================================================

cat("\n=== STEP-BY-STEP DESCRIPTIONS ===\n\n")

cat("STEP 1: SETUP\n")
cat("We begin with", n_points, "comment locations, each with a violence scalar score.\n")
cat("The study area is divided into a regular grid of 10km × 10km cells.\n")
cat("Goal: Interpolate scores to grid cells that may not contain any comments.\n\n")

cat("STEP 2: DISTANCE CALCULATION\n")
cat("For each target grid cell (e.g., Cell", example_cell_id, "), calculate the Euclidean\n")
cat("distance (d) from the cell centroid to every comment point.\n")
cat("These distances determine how much influence each point has on the cell.\n\n")

cat("STEP 3: WEIGHT CALCULATION\n")
cat("Calculate weights using the inverse distance squared formula: w = 1/d²\n")
cat("Closer points receive exponentially higher weights than distant points.\n")
cat("Weights are normalized so they sum to 100%.\n\n")

cat("STEP 4: WEIGHTED AVERAGE\n")
cat("The interpolated value for the target cell is the weighted average:\n")
cat("IDW = Σ(score × weight)\n")
cat("Example: Cell", example_cell_id, "IDW score =", round(interpolated_value, 2), "\n")
cat("Top contributors have the highest weights (closest points with high scores).\n\n")

cat("STEP 5: INTERPOLATION RESULT\n")
cat("Apply the IDW calculation to all", nrow(grid), "grid cells in the study area.\n")
cat("This creates a smooth, continuous surface of violence perception intensity.\n")
cat("Cells near high-score comments receive higher IDW values.\n\n")

cat("STEP 6: POWER PARAMETER EFFECT\n")
cat("The power parameter (idp) controls the rate of distance decay:\n")
cat("  idp = 1: Slower decay, smoother surface (more global influence)\n")
cat("  idp = 2: Balanced approach (commonly used)\n")
cat("  idp = 3: Faster decay, more localized patterns\n")
cat("We use idp = 2 in our analysis.\n\n")

cat("Visualizations saved successfully!\n")